/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.ac.gla.uk
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is InteractiveQuerying.java.
*
* The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
* Gianni Amati <gba{a.}fub.it> (original author)
* Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk>
* Ben He <ben{a.}dcs.gla.ac.uk>
* Craig Macdonald <craigm{a.}dcs.gla.ac.uk>
*/
package org.terrier.applications;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.log4j.Logger;
import org.terrier.matching.ResultSet;
import org.terrier.querying.Manager;
import org.terrier.querying.SearchRequest;
import org.terrier.structures.Index;
import org.terrier.structures.MetaIndex;
import org.terrier.utility.ApplicationSetup;
/**
* This class performs interactive querying at the command line. It asks
* for a query on Standard Input, and then displays the document IDs that
* match the given query.
* <p><b>Properties:</b>
* <ul><li><tt>interactive.model</tt> - which weighting model to use, defaults to PL2</li>
* <li><tt>interactive.matching</tt> - which Matching class to use, defaults to Matching</li>
* <li><tt>interactive.manager</tt> - which Manager class to use, defaults to Matching</li>
* </ul>
* @author Gianni Amati, Vassilis Plachouras, Ben He, Craig Macdonald
*/
public class SentenceRetreivalNew {
/** The logger used */
protected static final Logger logger = Logger.getLogger(SentenceRetreivalNew.class);
/** Change to lowercase? */
protected final static boolean lowercase = Boolean.parseBoolean(ApplicationSetup.getProperty("lowercase", "true"));
/** display user prompts */
protected boolean verbose = true;
/** the number of processed queries. */
protected int matchingCount = 0;
/** The file to store the output to.*/
protected PrintWriter resultFile = new PrintWriter(System.out);
/** The name of the manager object that handles the queries. Set by property <tt>trec.manager</tt>, defaults to Manager. */
protected String managerName = ApplicationSetup.getProperty("interactive.manager", "Manager");
/** The query manager.*/
protected Manager queryingManager;
/** The weighting model used. */
protected String wModel = ApplicationSetup.getProperty("interactive.model", "BM25");
/** The matching model used.*/
protected String mModel = ApplicationSetup.getProperty("interactive.matching", "Matching");
/** The data structures used.*/
protected Index index;
/** The maximum number of presented results. */
protected static int RESULTS_LENGTH =
Integer.parseInt(ApplicationSetup.getProperty("interactive.output.format.length", "1000"));
protected String[] metaKeys = ApplicationSetup.getProperty("interactive.output.meta.keys", "docno").split("\\s*,\\s*");
/** A default constructor initialises the index, and the Manager. */
public SentenceRetreivalNew() {
loadIndex();
createManager();
}
/**
* Create a querying manager. This method should be overriden if
* another matching model is required.
*/
protected void createManager(){
try{
if (managerName.indexOf('.') == -1)
managerName = "org.terrier.querying."+managerName;
else if (managerName.startsWith("uk.ac.gla.terrier"))
managerName = managerName.replaceAll("uk.ac.gla.terrier", "org.terrier");
queryingManager = (Manager) (Class.forName(managerName)
.getConstructor(new Class[]{Index.class})
.newInstance(new Object[]{index}));
} catch (Exception e) {
// logger.error("Problem loading Manager ("+managerName+"): ",e);
}
}
/**
* Loads index(s) from disk.
*
*/
protected void loadIndex(){
long startLoading = System.currentTimeMillis();
index = Index.createIndex();
if(index == null)
{
// logger.fatal("Failed to load index. Perhaps index files are missing");
}
long endLoading = System.currentTimeMillis();
//if (logger.isInfoEnabled())
{}
// //logger.info("time to intialise index : " + ((endLoading-startLoading)/1000.0D));
}
/**
* Closes the used structures.
*/
public void close() {
try{
index.close();
} catch (IOException ioe) {
// //logger.warn("Problem closing index", ioe);
}
}
/**
* According to the given parameters, it sets up the correct matching class.
* @param queryId String the query identifier to use.
* @param query String the query to process.
* @param cParameter double the value of the parameter to use.
*/
public void processQuery(String queryId, String query, double cParameter) {
SearchRequest srq = queryingManager.newSearchRequest(queryId, query);
srq.setControl("c", Double.toString(cParameter));
srq.addMatchingModel(mModel, wModel);
matchingCount++;
queryingManager.runPreProcessing(srq);
queryingManager.runMatching(srq);
queryingManager.runPostProcessing(srq);
queryingManager.runPostFilters(srq);
try{
printResults(resultFile, srq);
} catch (IOException ioe) {
// logger.error("Problem displaying results", ioe);
}
}
/**
* Performs the matching using the specified weighting model
* from the setup and possibly a combination of evidence mechanism.
* It parses the file with the queries (the name of the file is defined
* in the address_query file), creates the file of results, and for each
* query, gets the relevant documents, scores them, and outputs the results
* to the result file.
* @param cParameter the value of c
*/
public void processQueries(double cParameter) {
try {
//prepare console input
InputStreamReader consoleReader = new InputStreamReader(System.in);
BufferedReader consoleInput = new BufferedReader(consoleReader);
String query; int qid=1;
if (verbose)
System.out.print("Please enter your query: ");
while ((query = consoleInput.readLine()) != null) {
if (query.length() == 0 ||
query.toLowerCase().equals("quit") ||
query.toLowerCase().equals("exit")
)
{
return;
}
processQuery(""+(qid++), lowercase ? query.toLowerCase() : query, cParameter);
if (verbose)
System.out.print("Please enter your query: ");
}
} catch(IOException ioe) {
// logger.error("Input/Output exception while performing the matching. Stack trace follows.",ioe);
}
}
/**
* Prints the results
* @param pw PrintWriter the file to write the results to.
* @param q SearchRequest the search request to get results from.
*/
public void firstRSGenerator()throws IOException{
/*
* reading content of first document
*/
try{
meta1=index.getMetaIndex();
name=meta1.getItems("filename", docids);
FileReader fr=new FileReader(name[0]);
BufferedReader read=new BufferedReader(fr);
while((str456=read.readLine())!=null)
{
// System.out.println("str456 = "+str456);
firstdocument+=str456;
str456=null;
}
/*
* generating resultset using 1st document as a query
*/
tempRS=obj1.main1(args1, firstdocument);
// System.out.println("received resultset using 1st document as a query : "+tempRS);
/*
* adding the above resultset to ResultSetHolder class object
*
*/
rsh.add(new RSHolder());
rsh.get(0).set=tempRS;
rsh.get(0).content=firstdocument;
rsh.get(0).qid=docids[0];
// System.out.println("the first object of Resultset holder class is : "+rsh.get(0));
/*
* putting the resultSetHolder class object in hashMap
*
*/
hp.put(docids[0],rsh.get(0));
fpointer++;
// System.out.println(" first object in hp : "+hp.get(docids[0]));
// System.out.println("control being passed to nextRSGenerator");
nextRSGenerator();
}
catch(Exception e){
//System.out.println("Sorry we cannot answer your query");
}
}
ArrayList<RSHolder> rsh=new ArrayList<RSHolder>();
static HashMap<Integer,RSHolder> hp=new HashMap();
static MetaIndex meta1;
ResultSet tempRS;
Sim1 obj1=new Sim1();
int count=5;
double tempscore=0.0;
boolean flag=false;
int n=0,p=0;
RSHolder nextRSholder;
ResultSet nextRS;
int infinity=9999;
double[] nextscores=new double[1000];
int[] nextdocids;
ResultSet set; //set is the Parent Result Set
int pointerOnParRS;
int[] docids=new int[10000];
static String name[]; //this contains path or address of all results of Parent RS.
String firstdocument;
String str123,str456,str789;
static String args1[];
static int fpointer=0;
private ResultSet receivedSet[];
static String document;
double[] scores;
static int z=0;
int maxcount=0;
static int indexval;
public void nextRSGenerator()throws FileNotFoundException,IOException{
try{
for(int j=1;j<count;j++){
/*
* reading the next document from the resultset
*/
FileReader fr=new FileReader(name[j]);
BufferedReader read=new BufferedReader(fr);
while(( str123=read.readLine())!=null)
{
// System.out.println("str123 = "+str123);
document+=str123;
str123=null;
}
// catch(Exception e)
// {
// System.out.println("Sorry we cannot answer your query");
// }
/*
* now, in object of resultSetHolder class
* next object is being added
*/
rsh.add(new RSHolder());
tempRS=obj1.main1(args1, document);
rsh.get(j).set=tempRS;
rsh.get(j).content=document;
rsh.get(j).qid=docids[j];
//tempscore contains the first value of score to be passsed to compare fn
tempscore=scores[j];
while(!hp.isEmpty()&&z<=7)
{
if(hp.containsKey(docids[n])) //n here statrts from 0
//and this is if is to check whether that
//particular docid pos is contained in hashmap
{
// System.out.println("n= "+n);
nextRSholder=hp.get(docids[n]);
// System.out.println("the object retreived from hp : "+nextRSholder);
/*
* retreiving the contents of the hp's object
*
*/
nextRS=nextRSholder.set; //retreiving resultset
// System.out.println("nextRS is : "+nextRS);
nextscores=nextRS.getScores(); //retreiving scores from the above rseultset
// System.out.println("nextscores array is : "+nextscores);
nextdocids=nextRS.getDocids(); //retreiving docids from the above rseultset
// System.out.println("nextdoicds is : "+nextdocids);
maxcount=nextdocids.length;
// System.out.println("maxcount is : "+maxcount );
for(int p=0;p<maxcount;p++)
{
if(docids[j]==nextdocids[p])
indexval=p;
}
// System.out.println("lenght of nextscores is : "+nextscores.length);
// System.out.println("indexval value is "+indexval);
tempscore2[z]=nextscores[indexval];
z++;
}
}
if(compare(tempscore,max(tempscore2)))
hp.put(docids[n], rsh.get(j));
// System.out.println("member added in hp : "+hp.get(docids[n]));
n++;
}
} catch(Exception e)
{
// System.out.println("Sorry,query cannot be proceesed");
}
printer();
}
static double lambda=0.0;
double res;
double[] tempscore2=new double[100];
public boolean compare(double sim1val,double sim2val){
res=(lambda*sim1val)-((1-lambda)*sim2val);
if(res>=0)
return true;
else
return false;
}
private double max(double[] nextscores) {
double max;
int length=nextscores.length;
max=nextscores[0];
for(int k=1;k<length;k++)
{
if(max<nextscores[k])
max=nextscores[k];
}
return max;
}
// public void sim1func(String doc){
// obj.main1(args1, doc);
// }
public void printResults(PrintWriter pw, SearchRequest q) throws IOException {
set = q.getResultSet();
docids = set.getDocids();
scores = set.getScores();
firstRSGenerator();
int minimum = RESULTS_LENGTH;
//if the minimum number of documents is more than the
//number of documents in the results, aw.length, then
//set minimum = aw.length
if (minimum > set.getResultSize())
minimum = set.getResultSize();
// if (verbose)
// if(set.getResultSize()>0)
// pw.write("\n\tDisplaying 1-"+set.getResultSize()+ " results\n");
// else
// pw.write("\n\tNo results\n");
// if (set.getResultSize() == 0)
// return;
int metaKeyId = 0; final int metaKeyCount = metaKeys.length;
String[][] docNames = new String[metaKeyCount][];
for(String metaIndexDocumentKey : metaKeys)
{
if (set.hasMetaItems(metaIndexDocumentKey))
{
docNames[metaKeyId] = set.getMetaItems(metaIndexDocumentKey);
}
else
{
final MetaIndex metaIndex = index.getMetaIndex();
docNames[metaKeyId] = metaIndex.getItems(metaIndexDocumentKey, docids);
}
metaKeyId++;
}
StringBuilder sbuffer = new StringBuilder();
//the results are ordered in asceding order
//with respect to the score. For example, the
//document with the highest score has score
//score[scores.length-1] and its docid is
//docid[docids.length-1].
int start = 0;
int end = minimum;
}
// for (int i = start; i < end; i++) {
// sbuffer.append(i);
// sbuffer.append(" ");
// //sbuffer.append(docids[i]);
// for(metaKeyId = 0; metaKeyId < metaKeyCount; metaKeyId++)
// {
// sbuffer.append(docNames[metaKeyId][i]);
// sbuffer.append(" ");
// }
// sbuffer.append(docids[i]);
// sbuffer.append(" ");
// sbuffer.append(scores[i]);
// sbuffer.append('\n');
// }
//System.out.println(sbuffer.toString());
// pw.write(sbuffer.toString());
// pw.flush();
//pw.write("finished outputting\n");
//}
/**
* Starts the interactive query application.
* @param args the command line arguments.
*/
public static void main(String[] args) {
args1=args;
int len=args.length;
// for(int o=0;o<len;o++)
// {System.out.println("args is: "+args[o]);}
SentenceRetreivalNew iq = new SentenceRetreivalNew();
if (args.length == 0)
{
iq.processQueries(1.0);
}
else if (args.length == 1 && args[0].equals("--noverbose"))
{
iq.verbose = false;
iq.processQueries(1.0);
}
else
{
iq.verbose = false;
StringBuilder s = new StringBuilder();
for(int i=0; i<args.length;i++)
{
s.append(args[i]);
s.append(" ");
}
iq.processQuery("CMDLINE", s.toString(), 1.0);
}
}
String finaldocs;
String content=new String();
private void printer() throws IOException {
MetaIndex meta2=index.getMetaIndex();
// System.out.println("the final docids are : ");
for(int h:hp.keySet())
{
// System.out.println(" h is: "+h);
finaldocs=meta1.getItem("filename", h);
// System.out.println(finaldocs);
FileReader fr1=new FileReader(finaldocs);
BufferedReader read1=new BufferedReader(fr1);
str789=read1.readLine();
do
{
content=content+" "+str789;
str789="";
} while((str789=read1.readLine())!=null);
}
System.out.println("\n "+content);
content="";
System.out.println("\n ");
}
}